* Dataproc_twosample.do
* 2014.11.05
* Last update 2015.07.08: restricting sample to prime-age workers

capture log close
set more off
timer clear 1
timer on 1
clear

local work "/DIRECTORY"

log using "`work'/logs/Dataproc_twosample.log", replace


**************************
* ATUS
**************************
* We need a few different versions of this dataset for looking at full time
* workers, part timers, and non-workers 
foreach person in  "all" "full" "part" "non" {
   use "`work'/data/atus_proc.dta", clear

   * Subset appropriately
   if "`person'" == "full" {
      keep if !missing(wkly_wage)
      keep if trdpftpt == 1
   }
   else if "`person'" == "part" {
      keep if trdpftpt == 2
   }
   else if "`person'" == "non" {
      keep if trdpftpt == -1
   }
   else if "`person'" == "all" {
      keep if trdpftpt != -1
   }
   keep if sleep > 2*7
   drop if age >= 65
   drop if age <=17

   * Variable handling
   gen pop_density = pop_2010/area_mi
   gen tq = qofd(date)
   format tq %tq
   drop FIPS
   encode fips, generate(FIPS)
   gen fips_temp = state_fips + "000"
   bysort fips: egen fips2 = mode(fips_temp), maxmode
   drop fips_temp
   gen obs=1
   replace wage = . if wage<0
   * ATUS denotes missing as -1 and we have divided the native variable by 100, so missing is then -.01 
   gen hrly_obs = !missing(wage)
   gen hs = inrange(peeduca, 31, 39)
   * hs or less
   * some college or more 
   gen college = inrange(peeduca, 40, 46) 
   gen other_time_exnaps = other_time - nap_sst1
   capture drop age_cut**
   capture drop age_cut_*
   gen age_cut = 1 if inrange(age, 0, 24)
   replace age_cut = 2 if inrange(age, 25, 34)
   replace age_cut = 3 if inrange(age, 35, 44)
   replace age_cut = 4 if inrange(age, 45, 54)
   replace age_cut = 5 if inrange(age, 55, 150)
   tab age_cut, gen(age_cut_)
   capture gen age2 = age*age
   egen age_cut2 = cut(age) if age > 17 & age < 65, group(5)
   tab age_cut2, gen(age_cut2_)
   gen married = (pemaritl != 6)
   gen salary = wkly_wage if wage == .
   gen wkly_wage_hour = wkly_wage if wage != .
   gen part_time = (trdpftpt == 2)
                                   

   * Collapse to quarter-year (to match QCEW)
   collapse (mean) tudiaryday_* holiday bedtime waketime other_time_exnaps work sleep sleep_base gender race_* educ_* primary_occupation_* primary_industry_* sunset_time_avg wkly_wage wage mean_age=age age2 tehruslt trdpftpt hs college years_educ part_time ///
          age_cut* married trchildnum time_c_* salary wkly_wage_hour ///
          (median) med_age=age (sum) obs hrly_obs, by(tq year FIPS fips fips2 coast_dist latitude longitude time_zone gereg_*)
   * Collapse to year
   collapse (mean) tudiaryday_* holiday bedtime waketime other_time_exnaps work sleep sleep_base gender race_* educ_* primary_occupation_* primary_industry_* sunset_time_avg wkly_wage wage mean_age age2 tehruslt trdpftpt hs college years_educ part_time ///
          age_cut* married trchildnum time_c_* salary wkly_wage_hour (median) med_age (sum) obs hrly_obs, by(year FIPS fips fips2 coast_dist latitude longitude time_zone gereg_*)
   * Collapse to cross section
   collapse (mean) tudiaryday_* holiday bedtime waketime other_time_exnaps work sleep sleep_base gender race_* educ_* primary_occupation_* primary_industry_* sunset_time_avg wkly_wage wage mean_age age2 tehruslt trdpftpt hs college years_educ part_time ///
          age_cut* married trchildnum time_c_* salary wkly_wage_hour (median) med_age (sum) obs hrly_obs, by(FIPS fips fips2 coast_dist latitude longitude time_zone gereg_*)

   * Post-collapse variable handling
   gen ln_wkly_wage = log(wkly_wage)
   gen ln_wage = log(wage)
   gen ln_salary = ln(salary)
   gen ln_wkly_wage_hour = ln(wkly_wage_hour)
   merge m:1 fips using "`work'/data/demographics/CountiesDB_subset.dta", keep(1 3 4 5) keepusing(pop_density pop_2010) update nogenerate
   merge m:1 fips2 using "`work'/data/demographics/CountiesDB_subset.dta", keep(1 3 4 5) keepusing(pop_density pop_2010) update nogenerate
   gen ln_pop_density = log(pop_density)
   gen group = 2

   * Merging coastal counties
   merge m:1 fips using "`work'/data/coastal_counties.dta", keep(1 3)
   gen coastal = (_merge == 3)
   drop _merge
   merge m:1 fips2 using "`work'/data/coastal_states.dta"
   replace coastal = 1 if (_merge == 3 & fips==fips2)
   drop if _merge ==2
   drop _merge

   * Merging QOL index
   merge m:1 fips2 using "`work'/data/qol/QOL_index.dta", keep(1 3) nogenerate

   * Saving
   keep tudiaryday_* holiday bedtime waketime other_time_exnaps work sleep sleep_base ln_wkly_wage ln_wage wkly_wage wage coast_dist coastal latitude longitude *age* mean_age gender race_* educ_* gereg_* ///
          primary_occupation_* primary_industry_* trdpftpt years_educ hs college pop_density ln_pop_density pop_2010 sunset_time_avg time_zone fips fips2 group obs hrly_obs tehruslt WRLURI capital_stock qol married trchildnum ///
          time_c_* *salary *wkly_wage_hour part_time
   describe
   keep if group==2 & inlist(time_zone, "P", "M", "C", "E")

   * Variable handling
   mkspline lat_10_ 10 = latitude
   mkspline pd_ 5 = pop_density
   mkspline cd_ 3 = coast_dist
   gen lnsleep = log(sleep)
   encode time_zone, generate(ATUStzcode)
   gen state_coded = (fips == fips2)
   gen coast = ((coastal == 1 & coast_dist < 1) | (coast_dist < 1 & state_coded == 1))
   gen cc = coast_dist*coast

   * Variable labeling
   label variable ln_wkly_wage "ln(earnings)"
   label variable sleep "Sleep"
   label variable sunset_time_avg "Avg. sunset time"
   label variable lnsleep "ln(sleep)"
   label variable sleep_base "Sleep and naps"
   
   saveold "`work'/data/Twosample_`person'.dta", replace
}


**************************
* QCEW
**************************
use "`work'/data/qcew/QCEW_master.dta", clear

* Variable handling
* Convention will be renaming QCEW variables to ATUS counterparts
rename median_age age
rename pct_female gender /* confirmed female=1 in both data sets */
rename pct_asian race_3
rename pct_white race_1
rename pct_black race_2
rename pct_other race_4
gen educ_1 = pct_lessthan9thgr + pct_gr9to12
drop pct_lessthan9thgr pct_gr9to12
rename pct_HSgrad educ_2
rename pct_some_college educ_3
rename pct_BA_BS educ_4
rename pct_grad_prof_degree educ_5
rename AverageWeeklyWage10 wkly_wage
gen obs=1

* Collapse to year
collapse (mean) gender race_* educ_* sunset_time_avg wkly_wage pop_density naics_* occ_* (median) med_age=age (sum) AvgEmployment10 obs, by(year fips coast_dist latitude longitude tzcode)
* Collapse to cross section
collapse (mean) gender race_* educ_* sunset_time_avg wkly_wage pop_density naics_* occ_* (median) med_age (sum) AvgEmployment10 obs, by(fips coast_dist latitude longitude tzcode)

* Post-collapse variable handling
gen ln_wkly_wage = log(wkly_wage)
gen ln_pop_density = log(pop_density)
gen group = 1


**************************
* Combined
**************************
append using "`ATUS'"
tab group
label var time_c_2 "Sleep"
label var time_c_1 "Work"
label var time_c_3 "Home production"
label var time_c_4 "Leisure"


* Saving
save "`work'/data/Twosample.dta", replace


*****************************
* ATUS - alternative samples
*****************************

*** No weekends ***
use "`work'/data/atus_proc.dta", clear
keep if time_c_1 > 0 
* Subset to workers
keep if !missing(wkly_wage)
keep if trdpftpt == 1
drop if age >= 65
drop if age <=17
keep if sleep > 2*7
* Variable handling
gen pop_density = pop_2010/area_mi
gen tq = qofd(date)
format tq %tq
drop FIPS
encode fips, generate(FIPS)
gen fips_temp = state_fips + "000"
bysort fips: egen fips2 = mode(fips_temp)
drop fips_temp
gen obs=1
replace wage = . if wage<0 /* ATUS denotes missing as -1 and we have divided the native variable by 100, so missing is then -.01 */
gen hrly_obs = !missing(wage)
capture drop age_cut*
capture drop age_cut_*
gen age_cut = 1 if inrange(age, 0, 24)
replace age_cut = 2 if inrange(age, 25, 34)
replace age_cut = 3 if inrange(age, 35, 44)
replace age_cut = 4 if inrange(age, 45, 54)
replace age_cut = 5 if inrange(age, 55, 150)
tab age_cut, gen(age_cut_)
capture gen age2 = age*age
egen age_cut2 = cut(age) if age > 17 & age < 65, group(5)
tab age_cut2, gen(age_cut2_)
* Collapse to quarter-year (to match QCEW)
collapse (mean) time_c_* sleep sleep_base gender race_* educ_* primary_occupation_* primary_industry_* sunset_time_avg wkly_wage wage mean_age=age age2 age_cut* tehruslt trdpftpt years_educ (median) med_age=age (sum) obs hrly_obs, by(tq year FIPS fips fips2 coast_dist latitude longitude time_zone gereg_* tudiaryday)
* Collapse to year
collapse (mean) time_c_* sleep sleep_base gender race_* educ_* primary_occupation_* primary_industry_* sunset_time_avg wkly_wage wage mean_age age2 age_cut* tehruslt trdpftpt years_educ (median) med_age (sum) obs hrly_obs, by(year FIPS fips fips2 coast_dist latitude longitude time_zone gereg_*  tudiaryday)
* Collapse to cross section
collapse (mean) time_c_* sleep sleep_base gender race_* educ_* primary_occupation_* primary_industry_* sunset_time_avg wkly_wage wage mean_age age2 age_cut* tehruslt trdpftpt years_educ (median) med_age (sum) obs hrly_obs, by(FIPS fips fips2 coast_dist latitude longitude time_zone gereg_*  tudiaryday)
* Post-collapse variable handling
gen ln_wkly_wage = log(wkly_wage)
gen ln_wage = log(wage)
merge m:1 fips using "`work'/data/demographics/CountiesDB_subset.dta", keep(1 3 4 5) keepusing(pop_density) update nogenerate
merge m:1 fips2 using "`work'/data/demographics/CountiesDB_subset.dta", keep(1 3 4 5) keepusing(pop_density) update nogenerate
gen ln_pop_density = log(pop_density)
gen group = 2
* Merging coastal counties
merge m:1 fips using "`work'/data/coastal_counties.dta", keep(1 3)
gen coastal = (_merge == 3)
drop _merge
merge m:1 fips2 using "`work'/data/coastal_states.dta"
replace coastal = 1 if (_merge == 3 & fips==fips2)
drop if _merge ==2
drop _merge

* Saving
save "`work'/data/ATUS_xsec_nowkends.dta", replace


*** Only part-time workers ***
use "`work'/data/atus_proc.dta", clear
* Subset to workers
keep if !missing(wkly_wage)
drop if age >= 65
drop if age <=17
keep if sleep > 2*7
* Variable handling
gen pop_density = pop_2010/area_mi
gen tq = qofd(date)
format tq %tq
drop FIPS
encode fips, generate(FIPS)
gen fips_temp = state_fips + "000"
bysort fips: egen fips2 = mode(fips_temp)
drop fips_temp
gen obs=1
replace wage = . if wage<0 /* ATUS denotes missing as -1 and we have divided the native variable by 100, so missing is then -.01 */
gen hrly_obs = !missing(wage)
capture drop age_cut*
capture drop age_cut_*
gen age_cut = 1 if inrange(age, 0, 24)
replace age_cut = 2 if inrange(age, 25, 34)
replace age_cut = 3 if inrange(age, 35, 44)
replace age_cut = 4 if inrange(age, 45, 54)
replace age_cut = 5 if inrange(age, 55, 150)
tab age_cut, gen(age_cut_)
capture gen age2 = age*age
egen age_cut2 = cut(age) if age > 17 & age < 65, group(5)
tab age_cut2, gen(age_cut2_)
* Collapse to quarter-year (to match QCEW)
collapse (mean) sleep sleep_base gender race_* educ_* primary_occupation_* primary_industry_* sunset_time_avg wkly_wage wage mean_age=age age2 tehruslt trdpftpt years_educ age_cut* (median) med_age=age (sum) obs hrly_obs, by(tq year FIPS fips fips2 coast_dist latitude longitude time_zone gereg_*)
* Collapse to year
collapse (mean) sleep sleep_base gender race_* educ_* primary_occupation_* primary_industry_* sunset_time_avg wkly_wage wage mean_age age2 tehruslt trdpftpt years_educ age_cut* (median) med_age (sum) obs hrly_obs, by(year FIPS fips fips2 coast_dist latitude longitude time_zone gereg_*)
* Collapse to cross section
collapse (mean) sleep sleep_base gender race_* educ_* primary_occupation_* primary_industry_* sunset_time_avg wkly_wage wage mean_age age2 tehruslt trdpftpt years_educ age_cut* (median) med_age (sum) obs hrly_obs, by(FIPS fips fips2 coast_dist latitude longitude time_zone gereg_*)
* Post-collapse variable handling
gen ln_wkly_wage = log(wkly_wage)
gen ln_wage = log(wage)
merge m:1 fips using "`work'/data/demographics/CountiesDB_subset.dta", keep(1 3 4 5) keepusing(pop_density) update nogenerate
merge m:1 fips2 using "`work'/data/demographics/CountiesDB_subset.dta", keep(1 3 4 5) keepusing(pop_density) update nogenerate
gen ln_pop_density = log(pop_density)
gen group = 2
* Merging coastal counties
merge m:1 fips using "`work'/data/coastal_counties.dta", keep(1 3)
gen coastal = (_merge == 3)
drop _merge
merge m:1 fips2 using "`work'/data/coastal_states.dta"
replace coastal = 1 if (_merge == 3 & fips==fips2)
drop if _merge ==2
drop _merge
* Saving
save "`work'/data/ATUS_xsec_wPT.dta", replace


*** High work hours, low sleep ***
use "`work'/data/atus_proc.dta", clear
* Subset to high hours
gen highworkhrs = (work>=8*7 & sleep<=6*7) if !missing(work) /* work above 75th pctile and sleep below 10th */
keep if highworkhrs==1
* Subset to workers
keep if !missing(wkly_wage)
keep if trdpftpt == 1
drop if age >= 65
drop if age <=17
* Variable handling
gen pop_density = pop_2010/area_mi
gen tq = qofd(date)
format tq %tq
drop FIPS
encode fips, generate(FIPS)
gen fips_temp = state_fips + "000"
bysort fips: egen fips2 = mode(fips_temp)
drop fips_temp
gen obs=1
replace wage = . if wage<0 /* ATUS denotes missing as -1 and we have divided the native variable by 100, so missing is then -.01 */
gen hrly_obs = !missing(wage)
gen hs = inrange(peeduca, 31, 39) /* hs or less */
gen college = inrange(peeduca, 40, 46) /* some college or more */
gen other_time_exnaps = other_time - nap_sst1
capture drop age_cut*
capture drop age_cut_*
gen age_cut = 1 if inrange(age, 0, 24)
replace age_cut = 2 if inrange(age, 25, 34)
replace age_cut = 3 if inrange(age, 35, 44)
replace age_cut = 4 if inrange(age, 45, 54)
replace age_cut = 5 if inrange(age, 55, 150)
tab age_cut, gen(age_cut_)
capture gen age2 = age*age
egen age_cut2 = cut(age) if age > 17 & age < 65, group(5)
tab age_cut2, gen(age_cut2_)
* Collapse to quarter-year (to match QCEW)
collapse (mean) other_time_exnaps work sleep sleep_base gender race_* educ_* primary_occupation_* primary_industry_* sunset_time_avg wkly_wage wage mean_age=age age2 age_cut* tehruslt trdpftpt hs college years_educ (median) med_age=age (sum) obs hrly_obs, by(tq year FIPS fips fips2 coast_dist latitude longitude time_zone gereg_*)
* Collapse to year
collapse (mean) other_time_exnaps work sleep sleep_base gender race_* educ_* primary_occupation_* primary_industry_* sunset_time_avg wkly_wage wage mean_age age2 age_cut* tehruslt trdpftpt hs college years_educ (median) med_age (sum) obs hrly_obs, by(year FIPS fips fips2 coast_dist latitude longitude time_zone gereg_*)
* Collapse to cross section
collapse (mean) other_time_exnaps work sleep sleep_base gender race_* educ_* primary_occupation_* primary_industry_* sunset_time_avg wkly_wage wage mean_age age2 age_cut* tehruslt trdpftpt hs college years_educ (median) med_age (sum) obs hrly_obs, by(FIPS fips fips2 coast_dist latitude longitude time_zone gereg_*)
* Post-collapse variable handling
gen ln_wkly_wage = log(wkly_wage)
gen ln_wage = log(wage)
merge m:1 fips using "`work'/data/demographics/CountiesDB_subset.dta", keep(1 3 4 5) keepusing(pop_density) update nogenerate
merge m:1 fips2 using "`work'/data/demographics/CountiesDB_subset.dta", keep(1 3 4 5) keepusing(pop_density) update nogenerate
gen ln_pop_density = log(pop_density)
gen group = 2
* Merging coastal counties
merge m:1 fips using "`work'/data/coastal_counties.dta", keep(1 3)
gen coastal = (_merge == 3)
drop _merge
merge m:1 fips2 using "`work'/data/coastal_states.dta"
replace coastal = 1 if (_merge == 3 & fips==fips2)
drop if _merge ==2
drop _merge
* Saving
save "`work'/data/ATUS_xsec_hihrs_lowsleep.dta", replace


*** Low wage ***
use "`work'/data/atus_proc.dta", clear
* Subset to low wage
gen lowwage = (ln_wkly_wage<=5.66) if !missing(ln_wkly_wage) /* wage below 10th pctile */
keep if lowwage==1
* Subset to workers
keep if !missing(wkly_wage)
keep if trdpftpt == 1
drop if age >= 65
drop if age <=17
keep if sleep > 13.66
* Variable handling
gen pop_density = pop_2010/area_mi
gen tq = qofd(date)
format tq %tq
drop FIPS
encode fips, generate(FIPS)
gen fips_temp = state_fips + "000"
bysort fips: egen fips2 = mode(fips_temp)
drop fips_temp
gen obs=1
replace wage = . if wage<0 /* ATUS denotes missing as -1 and we have divided the native variable by 100, so missing is then -.01 */
gen hrly_obs = !missing(wage)
gen hs = inrange(peeduca, 31, 39) /* hs or less */
gen college = inrange(peeduca, 40, 46) /* some college or more */
gen other_time_exnaps = other_time - nap_sst1
capture drop age_cut*
capture drop age_cut_*
gen age_cut = 1 if inrange(age, 0, 24)
replace age_cut = 2 if inrange(age, 25, 34)
replace age_cut = 3 if inrange(age, 35, 44)
replace age_cut = 4 if inrange(age, 45, 54)
replace age_cut = 5 if inrange(age, 55, 150)
tab age_cut, gen(age_cut_)
capture gen age2 = age*age
egen age_cut2 = cut(age) if age > 17 & age < 65, group(5)
tab age_cut2, gen(age_cut2_)
* Collapse to quarter-year (to match QCEW)
collapse (mean) other_time_exnaps work sleep sleep_base gender race_* educ_* primary_occupation_* primary_industry_* sunset_time_avg wkly_wage wage mean_age=age age2 age_cut* tehruslt trdpftpt hs college years_educ (median) med_age=age (sum) obs hrly_obs, by(tq year FIPS fips fips2 coast_dist latitude longitude time_zone gereg_*)
* Collapse to year
collapse (mean) other_time_exnaps work sleep sleep_base gender race_* educ_* primary_occupation_* primary_industry_* sunset_time_avg wkly_wage wage mean_age age2 age_cut* tehruslt trdpftpt hs college years_educ (median) med_age (sum) obs hrly_obs, by(year FIPS fips fips2 coast_dist latitude longitude time_zone gereg_*)
* Collapse to cross section
collapse (mean) other_time_exnaps work sleep sleep_base gender race_* educ_* primary_occupation_* primary_industry_* sunset_time_avg wkly_wage wage mean_age age2 age_cut* tehruslt trdpftpt hs college years_educ (median) med_age (sum) obs hrly_obs, by(FIPS fips fips2 coast_dist latitude longitude time_zone gereg_*)
* Post-collapse variable handling
gen ln_wkly_wage = log(wkly_wage)
gen ln_wage = log(wage)
merge m:1 fips using "`work'/data/demographics/CountiesDB_subset.dta", keep(1 3 4 5) keepusing(pop_density) update nogenerate
merge m:1 fips2 using "`work'/data/demographics/CountiesDB_subset.dta", keep(1 3 4 5) keepusing(pop_density) update nogenerate
gen ln_pop_density = log(pop_density)
gen group = 2
* Merging coastal counties
merge m:1 fips using "`work'/data/coastal_counties.dta", keep(1 3)
gen coastal = (_merge == 3)
drop _merge
merge m:1 fips2 using "`work'/data/coastal_states.dta"
replace coastal = 1 if (_merge == 3 & fips==fips2)
drop if _merge ==2
drop _merge
* Saving
save "`work'/data/ATUS_xsec_lowwage.dta", replace


*** Hourly wage ***
use "`work'/data/atus_proc.dta", clear
* Subset to hourly workers
keep if !missing(ln_wage)
* Subset to workers
keep if trdpftpt == 1
drop if age >= 65
drop if age <=17
keep if sleep > 13.66
* Variable handling
gen pop_density = pop_2010/area_mi
gen tq = qofd(date)
format tq %tq
drop FIPS
encode fips, generate(FIPS)
gen fips_temp = state_fips + "000"
bysort fips: egen fips2 = mode(fips_temp)
drop fips_temp
gen obs=1
replace wage = . if wage<0 /* ATUS denotes missing as -1 and we have divided the native variable by 100, so missing is then -.01 */
gen hrly_obs = !missing(wage)
gen hs = inrange(peeduca, 31, 39) /* hs or less */
gen college = inrange(peeduca, 40, 46) /* some college or more */
gen other_time_exnaps = other_time - nap_sst1
capture drop age_cut*
capture drop age_cut_*
gen age_cut = 1 if inrange(age, 0, 24)
replace age_cut = 2 if inrange(age, 25, 34)
replace age_cut = 3 if inrange(age, 35, 44)
replace age_cut = 4 if inrange(age, 45, 54)
replace age_cut = 5 if inrange(age, 55, 150)
tab age_cut, gen(age_cut_)
capture gen age2 = age*age
egen age_cut2 = cut(age) if age > 17 & age < 65, group(5)
tab age_cut2, gen(age_cut2_)
* Collapse to quarter-year (to match QCEW)
collapse (mean) other_time_exnaps work sleep sleep_base gender race_* educ_* primary_occupation_* primary_industry_* sunset_time_avg wkly_wage wage mean_age=age age2 age_cut* tehruslt trdpftpt hs college years_educ (median) med_age=age (sum) obs hrly_obs, by(tq year FIPS fips fips2 coast_dist latitude longitude time_zone gereg_*)
* Collapse to year
collapse (mean) other_time_exnaps work sleep sleep_base gender race_* educ_* primary_occupation_* primary_industry_* sunset_time_avg wkly_wage wage mean_age age2 age_cut* tehruslt trdpftpt hs college years_educ (median) med_age (sum) obs hrly_obs, by(year FIPS fips fips2 coast_dist latitude longitude time_zone gereg_*)
* Collapse to cross section
collapse (mean) other_time_exnaps work sleep sleep_base gender race_* educ_* primary_occupation_* primary_industry_* sunset_time_avg wkly_wage wage mean_age age2 age_cut* tehruslt trdpftpt hs college years_educ (median) med_age (sum) obs hrly_obs, by(FIPS fips fips2 coast_dist latitude longitude time_zone gereg_*)
* Post-collapse variable handling
gen ln_wkly_wage = log(wkly_wage)
gen ln_wage = log(wage)
merge m:1 fips using "`work'/data/demographics/CountiesDB_subset.dta", keep(1 3 4 5) keepusing(pop_density) update nogenerate
merge m:1 fips2 using "`work'/data/demographics/CountiesDB_subset.dta", keep(1 3 4 5) keepusing(pop_density) update nogenerate
gen ln_pop_density = log(pop_density)
gen group = 2
* Merging coastal counties
merge m:1 fips using "`work'/data/coastal_counties.dta", keep(1 3)
gen coastal = (_merge == 3)
drop _merge
merge m:1 fips2 using "`work'/data/coastal_states.dta"
replace coastal = 1 if (_merge == 3 & fips==fips2)
drop if _merge ==2
drop _merge
* Saving
save "`work'/data/ATUS_xsec_hrly.dta", replace


*** No high-wage cities ***
use "`work'/data/atus_proc.dta", clear
* Dummies for high-wage cities
destring CSA, replace
gen boston=(CSA==715) if !missing(CSA)
gen nyc=(CSA==408) if !missing(CSA)
gen seattle=(CSA==500) if !missing(CSA)
gen sanfrancisco=(CSA==488) if !missing(CSA)
gen losangeles=(CSA==348) if !missing(CSA)
gen chicago=(CSA==176) if !missing(CSA)
gen dc=(CSA==548) if !missing(CSA)
sort fips CSA
foreach x in boston nyc seattle sanfrancisco losangeles chicago dc {
   bysort fips: replace `x' = `x'[_n-1] if missing(`x')
}
drop if (boston==1) | (nyc==1) | (chicago==1) | (sanfrancisco==1) | (losangeles==1)
* Subset to workers
keep if !missing(wkly_wage)
keep if trdpftpt == 1
drop if age >= 65
drop if age <=17
keep if sleep > 13.66
* Variable handling
gen pop_density = pop_2010/area_mi
gen tq = qofd(date)
format tq %tq
drop FIPS
encode fips, generate(FIPS)
gen fips_temp = state_fips + "000"
bysort fips: egen fips2 = mode(fips_temp)
drop fips_temp
gen obs=1
replace wage = . if wage<0 /* ATUS denotes missing as -1 and we have divided the native variable by 100, so missing is then -.01 */
gen hrly_obs = !missing(wage)
gen hs = inrange(peeduca, 31, 39) /* hs or less */
gen college = inrange(peeduca, 40, 46) /* some college or more */
gen other_time_exnaps = other_time - nap_sst1
capture drop age_cut*
capture drop age_cut_*
gen age_cut = 1 if inrange(age, 0, 24)
replace age_cut = 2 if inrange(age, 25, 34)
replace age_cut = 3 if inrange(age, 35, 44)
replace age_cut = 4 if inrange(age, 45, 54)
replace age_cut = 5 if inrange(age, 55, 150)
tab age_cut, gen(age_cut_)
capture gen age2 = age*age
egen age_cut2 = cut(age) if age > 17 & age < 65, group(5)
tab age_cut2, gen(age_cut2_)
* Collapse to quarter-year (to match QCEW)
collapse (mean) other_time_exnaps work sleep sleep_base gender race_* educ_* primary_occupation_* primary_industry_* sunset_time_avg wkly_wage wage mean_age=age age2 age_cut* tehruslt trdpftpt hs college years_educ (median) med_age=age (sum) obs hrly_obs, by(tq year FIPS fips fips2 coast_dist latitude longitude time_zone gereg_*)
* Collapse to year
collapse (mean) other_time_exnaps work sleep sleep_base gender race_* educ_* primary_occupation_* primary_industry_* sunset_time_avg wkly_wage wage mean_age age2 age_cut* tehruslt trdpftpt hs college years_educ (median) med_age (sum) obs hrly_obs, by(year FIPS fips fips2 coast_dist latitude longitude time_zone gereg_*)
* Collapse to cross section
collapse (mean) other_time_exnaps work sleep sleep_base gender race_* educ_* primary_occupation_* primary_industry_* sunset_time_avg wkly_wage wage mean_age age2 age_cut* tehruslt trdpftpt hs college years_educ (median) med_age (sum) obs hrly_obs, by(FIPS fips fips2 coast_dist latitude longitude time_zone gereg_*)
* Post-collapse variable handling
gen ln_wkly_wage = log(wkly_wage)
gen ln_wage = log(wage)
merge m:1 fips using "`work'/data/demographics/CountiesDB_subset.dta", keep(1 3 4 5) keepusing(pop_density) update nogenerate
merge m:1 fips2 using "`work'/data/demographics/CountiesDB_subset.dta", keep(1 3 4 5) keepusing(pop_density) update nogenerate
gen ln_pop_density = log(pop_density)
gen group = 2
* Merging coastal counties
merge m:1 fips using "`work'/data/coastal_counties.dta", keep(1 3)
gen coastal = (_merge == 3)
drop _merge
merge m:1 fips2 using "`work'/data/coastal_states.dta"
replace coastal = 1 if (_merge == 3 & fips==fips2)
drop if _merge ==2
drop _merge
* Saving
save "`work'/data/ATUS_xsec_noHWcities.dta", replace







timer off 1
timer list 1
capture log close



